(*
Formal Languages

In this chapter we shall apply our notion of DeqSet, and the list operations 
defined in Chapter 4 to formal languages. A formal language is an arbitrary set
of words over a given alphabet, that we shall represent as a predicate over words. 
*)
include "tutorial/chapter5.ma".

(* A word (or string) over an alphabet S is just a list of elements of S.*)
definition word ≝ λS:DeqSet.list S.

(* For any alphabet there is only one word of length 0, the empty word, which is 
denoted by ϵ .*) 

notation "ϵ" non associative with precedence 90 for @{ 'epsilon }.
interpretation "epsilon" 'epsilon = (nil ?).

(* The operation that consists in appending two words to form a new word, whose 
length is the sum of the lengths of the original words is called concatenation.
String concatenation is just the append operation over lists, hence there is no
point to define it. Similarly, many of its properties, such as the fact that 
concatenating a word with the empty word gives the original word, follow by 
general results over lists.
*)

(*
Operations over languages

Languages inherit all the basic operations for sets, namely union, intersection, 
complementation, substraction, and so on. In addition, we may define some new 
operations induced by string concatenation, and in particular the concatenation 
A · B of two languages A and B, the so called Kleene's star A* of A and the 
derivative of a language A w.r.t. a given character a. *)

definition cat : ∀S,l1,l2,w.Prop ≝ 
  λS.λl1,l2.λw:word S.∃w1,w2.w1 @ w2 = w ∧ l1 w1 ∧ l2 w2.

notation "a · b" non associative with precedence 60 for @{ 'middot $a $b}.
interpretation "cat lang" 'middot a b = (cat ? a b).


(* Given a language l, the Kleene's star of l, denoted by l*, is the set of 
finite-length strings that can be generated by concatenating arbitrary strings of 
l. In other words, w belongs to l* is and only if there exists a list of strings 
w1,w2,...wk all belonging to l, such that l = w1w2...wk. 
We need to define the latter operations. The following flatten function takes in 
input a list of words and concatenates them together. *)

let rec flatten (S : DeqSet) (l : list (word S)) on l : word S ≝ 
match l with [ nil ⇒ [ ] | cons w tl ⇒ w @ flatten ? tl ].

(* Given a list of words l and a language r, (conjunct l r) is true if and only if
all words in l are in r, that is for every w in l, r w holds. *)

let rec conjunct (S : DeqSet) (l : list (word S)) (r : word S → Prop) on l: Prop ≝
match l with [ nil ⇒ True | cons w tl ⇒ r w ∧ conjunct ? tl r ]. 

(* We are ready to give the formal definition of the Kleene's star of l:
a word w belongs to l* is and only if there exists a list of strings 
lw such that (conjunct lw l) and  l = flatten lw. *)

definition star ≝ λS.λl.λw:word S.∃lw.flatten ? lw = w ∧ conjunct ? lw l. 
notation "a ^ *" non associative with precedence 90 for @{ 'star $a}.
interpretation "star lang" 'star l = (star ? l).

(* The derivative of a language A with respect to a character a is the set of
all strings w such that aw is in A. *)

definition deriv ≝ λS.λA:word S → Prop.λa,w. A (a::w).

(* 
Language equalities

Equality between languages is just the usual extensional equality between
sets. The operation of concatenation behaves well with respect to this equality. *)

lemma cat_ext_l: ∀S.∀A,B,C:word S →Prop. 
  A =1 C  → A · B =1 C · B.
#S #A #B #C #H #w % * #w1 * #w2 * * #eqw #inw1 #inw2
cases (H w1) /6/
qed.

lemma cat_ext_r: ∀S.∀A,B,C:word S →Prop. 
  B =1 C → A · B =1 A · C.
#S #A #B #C #H #w % * #w1 * #w2 * * #eqw #inw1 #inw2
cases (H w2) /6/ 
qed.
  
(* Concatenating a language with the empty language results in the
empty language. *) 
lemma cat_empty_l: ∀S.∀A:word S→Prop. ∅ · A =1 ∅.
#S #A #w % [|*] * #w1 * #w2 * * #_ *
qed.

(* Concatenating a language l with the singleton language containing the
empty string, results in the language l; that is {ϵ} is a left and right 
unit with respect to concatenation. *)

lemma epsilon_cat_r: ∀S.∀A:word S →Prop.
  A · {ϵ} =1  A. 
#S #A #w %
  [* #w1 * #w2 * * #eqw #inw1 normalize #eqw2 <eqw //
  |#inA @(ex_intro … w) @(ex_intro … [ ]) /3/
  ]
qed.

lemma epsilon_cat_l: ∀S.∀A:word S →Prop.
  {ϵ} · A =1  A. 
#S #A #w %
  [* #w1 * #w2 * * #eqw normalize #eqw2 <eqw <eqw2 //
  |#inA @(ex_intro … ϵ) @(ex_intro … w) /3/
  ]
  qed.

(* Concatenation is distributive w.r.t. union. *)

lemma distr_cat_r: ∀S.∀A,B,C:word S →Prop.
  (A ∪ B) · C =1  A · C ∪ B · C. 
#S #A #B #C #w %
  [* #w1 * #w2 * * #eqw * /6/ |* * #w1 * #w2 * * /6/] 
qed.

lemma distr_cat_r_eps: ∀S.∀A,C:word S →Prop.
  (A ∪ {ϵ}) · C =1  A · C ∪ C. 
  #S #A #C @eqP_trans [|@distr_cat_r |@eqP_union_l @epsilon_cat_l]
qed.

(* The following is a major property of derivatives *)

lemma deriv_middot: ∀S,A,B,a. ¬ A ϵ → deriv S (A·B) a =1 (deriv S A a) · B.
#S #A #B #a #noteps #w normalize %
  [* #w1 cases w1 
    [* #w2 * * #_ #Aeps @False_ind /2/
    |#b #w2 * #w3 * * whd in ⊢ ((??%?)→?); #H destruct
     #H #H1 @(ex_intro … w2) @(ex_intro … w3) % // % //
    ]
  |* #w1 * #w2 * * #H #H1 #H2 @(ex_intro … (a::w1))
   @(ex_intro … w2) % // % normalize //
  ]
qed. 

(* 
Main Properties of Kleene's star

We conclude this section with some important properties of Kleene's
star that will be used in the following chapters. *)

lemma espilon_in_star: ∀S.∀A:word S → Prop.
  A^* ϵ.
#S #A @(ex_intro … [ ]) normalize /2/
qed.

lemma cat_to_star:∀S.∀A:word S → Prop.
  ∀w1,w2. A w1 → A^* w2 → A^* (w1@w2).
#S #A #w1 #w2 #Aw * #l * #H #H1 @(ex_intro … (w1::l)) 
% normalize /2/
qed.

lemma fix_star: ∀S.∀A:word S → Prop. 
  A^* =1 A · A^* ∪ {ϵ}.
#S #A #w %
  [* #l generalize in match w; -w cases l [normalize #w * /2/]
   #w1 #tl #w * whd in ⊢ ((??%?)→?); #eqw whd in ⊢ (%→?); *
   #w1A #cw1 %1 @(ex_intro … w1) @(ex_intro … (flatten S tl))
   % /2/ whd @(ex_intro … tl) /2/
  |* [2: whd in ⊢ (%→?); #eqw <eqw //]
   * #w1 * #w2 * * #eqw <eqw @cat_to_star 
  ]
qed.

lemma star_fix_eps : ∀S.∀A:word S → Prop.
  A^* =1 (A - {ϵ}) · A^* ∪ {ϵ}.  
#S #A #w %
  [* #l elim l 
    [* whd in ⊢ ((??%?)→?); #eqw #_ %2 <eqw // 
    |* [#tl #Hind * #H * #_ #H2 @Hind % [@H | //]
       |#a #w1 #tl #Hind * whd in ⊢ ((??%?)→?); #H1 * #H2 #H3 %1 
        @(ex_intro … (a::w1)) @(ex_intro … (flatten S tl)) %
         [% [@H1 | normalize % /2/] |whd @(ex_intro … tl) /2/]
       ]
    ]
  |* [* #w1 * #w2 * * #eqw * #H1 #_ <eqw @cat_to_star //
     | whd in ⊢ (%→?); #H <H //
     ]
  ]
qed. 
     
lemma star_epsilon: ∀S:DeqSet.∀A:word S → Prop.
  A^* ∪ {ϵ} =1 A^*.
#S #A #w % /2/ * // 
qed.
  